HW: Label the entire notebook with comments on what operation is each code chunk doing and what is the outcome

library(tidyverse)
library(plotly)
Penguins<-read.csv("penguins_size.csv")

We see that the data has 344 rows and 7 columns

Penguins
dim(Penguins)
[1] 344   7

Looking at summary stats

summary(Penguins)
   species             island          culmen_length_mm culmen_depth_mm flipper_length_mm
 Length:344         Length:344         Min.   :32.10    Min.   :13.10   Min.   :172.0    
 Class :character   Class :character   1st Qu.:39.23    1st Qu.:15.60   1st Qu.:190.0    
 Mode  :character   Mode  :character   Median :44.45    Median :17.30   Median :197.0    
                                       Mean   :43.92    Mean   :17.15   Mean   :200.9    
                                       3rd Qu.:48.50    3rd Qu.:18.70   3rd Qu.:213.0    
                                       Max.   :59.60    Max.   :21.50   Max.   :231.0    
                                       NA's   :2        NA's   :2       NA's   :2        
  body_mass_g       sex           
 Min.   :2700   Length:344        
 1st Qu.:3550   Class :character  
 Median :4050   Mode  :character  
 Mean   :4202                     
 3rd Qu.:4750                     
 Max.   :6300                     
 NA's   :2                        

We notice that Species and island are read in as characters. We will convert them to factor.

Penguins$species<-as.factor(Penguins$species)
Penguins$island<-as.factor(Penguins$island)
Penguins$sex<-as.factor(Penguins$sex)

Looking at summary again

summary(Penguins)
      species          island    culmen_length_mm culmen_depth_mm flipper_length_mm
 Adelie   :152   Biscoe   :168   Min.   :32.10    Min.   :13.10   Min.   :172.0    
 Chinstrap: 68   Dream    :124   1st Qu.:39.23    1st Qu.:15.60   1st Qu.:190.0    
 Gentoo   :124   Torgersen: 52   Median :44.45    Median :17.30   Median :197.0    
                                 Mean   :43.92    Mean   :17.15   Mean   :200.9    
                                 3rd Qu.:48.50    3rd Qu.:18.70   3rd Qu.:213.0    
                                 Max.   :59.60    Max.   :21.50   Max.   :231.0    
                                 NA's   :2        NA's   :2       NA's   :2        
  body_mass_g       sex     
 Min.   :2700   .     :  1  
 1st Qu.:3550   FEMALE:165  
 Median :4050   MALE  :168  
 Mean   :4202   NA's  : 10  
 3rd Qu.:4750               
 Max.   :6300               
 NA's   :2                  

We will remove the null values and also any erroneous values for sex

Clean_DF<-na.omit(Penguins)
Clean_DF<-Clean_DF%>%
  filter(sex == 'FEMALE' | sex == 'MALE')

This leaves us with the below summary stats:

summary(Clean_DF)
      species          island    culmen_length_mm culmen_depth_mm flipper_length_mm
 Adelie   :146   Biscoe   :163   Min.   :32.10    Min.   :13.10   Min.   :172      
 Chinstrap: 68   Dream    :123   1st Qu.:39.50    1st Qu.:15.60   1st Qu.:190      
 Gentoo   :119   Torgersen: 47   Median :44.50    Median :17.30   Median :197      
                                 Mean   :43.99    Mean   :17.16   Mean   :201      
                                 3rd Qu.:48.60    3rd Qu.:18.70   3rd Qu.:213      
                                 Max.   :59.60    Max.   :21.50   Max.   :231      
  body_mass_g       sex           
 Min.   :2700   Length:333        
 1st Qu.:3550   Class :character  
 Median :4050   Mode  :character  
 Mean   :4207                     
 3rd Qu.:4775                     
 Max.   :6300                     
Clean_DF$sex<-as.factor(Clean_DF$sex)
summary(Clean_DF)
      species          island    culmen_length_mm culmen_depth_mm flipper_length_mm
 Adelie   :146   Biscoe   :163   Min.   :32.10    Min.   :13.10   Min.   :172      
 Chinstrap: 68   Dream    :123   1st Qu.:39.50    1st Qu.:15.60   1st Qu.:190      
 Gentoo   :119   Torgersen: 47   Median :44.50    Median :17.30   Median :197      
                                 Mean   :43.99    Mean   :17.16   Mean   :201      
                                 3rd Qu.:48.60    3rd Qu.:18.70   3rd Qu.:213      
                                 Max.   :59.60    Max.   :21.50   Max.   :231      
  body_mass_g       sex     
 Min.   :2700   FEMALE:165  
 1st Qu.:3550   MALE  :168  
 Median :4050               
 Mean   :4207               
 3rd Qu.:4775               
 Max.   :6300               
boxplot(Clean_DF[3:5])

boxplot(Clean_DF$body_mass_g)

p<-ggplot(Clean_DF, aes(sex, body_mass_g, fill=sex))+
  geom_boxplot()

ggplotly(p)
p<-ggplot(Clean_DF, aes(sex, culmen_length_mm, fill=sex))+
  geom_boxplot()

ggplotly(p)
p<-ggplot(Clean_DF, aes(sex, culmen_depth_mm, fill=sex))+
  geom_boxplot()

ggplotly(p)
p<-ggplot(Clean_DF, aes(sex, flipper_length_mm, fill=sex))+
  geom_boxplot()

ggplotly(p)

Looking at body measures by Species

p<-ggplot(Clean_DF, aes(species, body_mass_g, fill=species))+
  geom_boxplot()

ggplotly(p)
p<-ggplot(Clean_DF, aes(species, culmen_length_mm, fill=species))+
  geom_boxplot()

ggplotly(p)
p<-ggplot(Clean_DF, aes(species, culmen_depth_mm, fill=species))+
  geom_boxplot()

ggplotly(p)
p<-ggplot(Clean_DF, aes(species, flipper_length_mm, fill=species))+
  geom_boxplot()

ggplotly(p)
p<-ggplot(data = Clean_DF) + 
  geom_bar(mapping = aes(x = species, fill=species))

ggplotly(p)
p<-ggplot(data = Clean_DF) + 
  geom_bar(mapping = aes(x = island, fill=species))

ggplotly(p)
p<-ggplot(data = Clean_DF) + 
  geom_point(mapping = aes(x = culmen_length_mm, y = culmen_depth_mm,color = species, shape=island))

ggplotly(p)
p<-ggplot(data = Clean_DF) + 
  geom_point(mapping = aes(x = culmen_length_mm, y = flipper_length_mm,color = species, shape=island))

ggplotly(p)
p<-ggplot(data = Clean_DF) + 
  geom_point(mapping = aes(x = culmen_length_mm, y = body_mass_g, color = species, shape=island))

ggplotly(p)
library(corrplot)
M<-cor(Clean_DF[3:6])
corrplot(M,method="color",addCoef.col = "white")

LS0tCnRpdGxlOiAiUGVuZ3VpbiBEYXRhIEVEQSIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKSFc6IExhYmVsIHRoZSBlbnRpcmUgbm90ZWJvb2sgd2l0aCBjb21tZW50cyBvbiB3aGF0IG9wZXJhdGlvbiBpcyBlYWNoIGNvZGUgY2h1bmsgZG9pbmcgYW5kIHdoYXQgaXMgdGhlIG91dGNvbWUKCmBgYHtyfQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShwbG90bHkpClBlbmd1aW5zPC1yZWFkLmNzdigicGVuZ3VpbnNfc2l6ZS5jc3YiKQpgYGAKCldlIHNlZSB0aGF0IHRoZSBkYXRhIGhhcyAzNDQgcm93cyBhbmQgNyBjb2x1bW5zCgpgYGB7cn0KUGVuZ3VpbnMKZGltKFBlbmd1aW5zKQpgYGAKCkxvb2tpbmcgYXQgc3VtbWFyeSBzdGF0cwpgYGB7cn0Kc3VtbWFyeShQZW5ndWlucykKYGBgCldlIG5vdGljZSB0aGF0IFNwZWNpZXMgYW5kIGlzbGFuZCBhcmUgcmVhZCBpbiBhcyBjaGFyYWN0ZXJzLiBXZSB3aWxsIGNvbnZlcnQgdGhlbSB0byBmYWN0b3IuCmBgYHtyfQpQZW5ndWlucyRzcGVjaWVzPC1hcy5mYWN0b3IoUGVuZ3VpbnMkc3BlY2llcykKUGVuZ3VpbnMkaXNsYW5kPC1hcy5mYWN0b3IoUGVuZ3VpbnMkaXNsYW5kKQpQZW5ndWlucyRzZXg8LWFzLmZhY3RvcihQZW5ndWlucyRzZXgpCmBgYAoKTG9va2luZyBhdCBzdW1tYXJ5IGFnYWluCmBgYHtyfQpzdW1tYXJ5KFBlbmd1aW5zKQpgYGAKV2Ugd2lsbCByZW1vdmUgdGhlIG51bGwgdmFsdWVzIGFuZCBhbHNvIGFueSBlcnJvbmVvdXMgdmFsdWVzIGZvciBzZXggIApgYGB7cn0KQ2xlYW5fREY8LW5hLm9taXQoUGVuZ3VpbnMpCkNsZWFuX0RGPC1DbGVhbl9ERiU+JQogIGZpbHRlcihzZXggPT0gJ0ZFTUFMRScgfCBzZXggPT0gJ01BTEUnKQpgYGAKClRoaXMgbGVhdmVzIHVzIHdpdGggdGhlIGJlbG93IHN1bW1hcnkgc3RhdHM6CmBgYHtyfQpzdW1tYXJ5KENsZWFuX0RGKQpgYGAKCmBgYHtyfQpDbGVhbl9ERiRzZXg8LWFzLmZhY3RvcihDbGVhbl9ERiRzZXgpCmBgYAoKYGBge3J9CnN1bW1hcnkoQ2xlYW5fREYpCmBgYAoKYGBge3J9CmJveHBsb3QoQ2xlYW5fREZbMzo1XSkKYGBgCgpgYGB7cn0KYm94cGxvdChDbGVhbl9ERiRib2R5X21hc3NfZykKYGBgCgpgYGB7cn0KcDwtZ2dwbG90KENsZWFuX0RGLCBhZXMoc2V4LCBib2R5X21hc3NfZywgZmlsbD1zZXgpKSsKICBnZW9tX2JveHBsb3QoKQoKZ2dwbG90bHkocCkKYGBgCgpgYGB7cn0KcDwtZ2dwbG90KENsZWFuX0RGLCBhZXMoc2V4LCBjdWxtZW5fbGVuZ3RoX21tLCBmaWxsPXNleCkpKwogIGdlb21fYm94cGxvdCgpCgpnZ3Bsb3RseShwKQpgYGAKCgpgYGB7cn0KcDwtZ2dwbG90KENsZWFuX0RGLCBhZXMoc2V4LCBjdWxtZW5fZGVwdGhfbW0sIGZpbGw9c2V4KSkrCiAgZ2VvbV9ib3hwbG90KCkKCmdncGxvdGx5KHApCmBgYAoKYGBge3J9CnA8LWdncGxvdChDbGVhbl9ERiwgYWVzKHNleCwgZmxpcHBlcl9sZW5ndGhfbW0sIGZpbGw9c2V4KSkrCiAgZ2VvbV9ib3hwbG90KCkKCmdncGxvdGx5KHApCmBgYAoKTG9va2luZyBhdCBib2R5IG1lYXN1cmVzIGJ5IFNwZWNpZXMKCmBgYHtyfQpwPC1nZ3Bsb3QoQ2xlYW5fREYsIGFlcyhzcGVjaWVzLCBib2R5X21hc3NfZywgZmlsbD1zcGVjaWVzKSkrCiAgZ2VvbV9ib3hwbG90KCkKCmdncGxvdGx5KHApCmBgYAoKYGBge3J9CnA8LWdncGxvdChDbGVhbl9ERiwgYWVzKHNwZWNpZXMsIGN1bG1lbl9sZW5ndGhfbW0sIGZpbGw9c3BlY2llcykpKwogIGdlb21fYm94cGxvdCgpCgpnZ3Bsb3RseShwKQpgYGAKCgpgYGB7cn0KcDwtZ2dwbG90KENsZWFuX0RGLCBhZXMoc3BlY2llcywgY3VsbWVuX2RlcHRoX21tLCBmaWxsPXNwZWNpZXMpKSsKICBnZW9tX2JveHBsb3QoKQoKZ2dwbG90bHkocCkKYGBgCgpgYGB7cn0KcDwtZ2dwbG90KENsZWFuX0RGLCBhZXMoc3BlY2llcywgZmxpcHBlcl9sZW5ndGhfbW0sIGZpbGw9c3BlY2llcykpKwogIGdlb21fYm94cGxvdCgpCgpnZ3Bsb3RseShwKQpgYGAKCmBgYHtyfQpwPC1nZ3Bsb3QoZGF0YSA9IENsZWFuX0RGKSArIAogIGdlb21fYmFyKG1hcHBpbmcgPSBhZXMoeCA9IHNwZWNpZXMsIGZpbGw9c3BlY2llcykpCgpnZ3Bsb3RseShwKQpgYGAKCmBgYHtyfQpwPC1nZ3Bsb3QoZGF0YSA9IENsZWFuX0RGKSArIAogIGdlb21fYmFyKG1hcHBpbmcgPSBhZXMoeCA9IGlzbGFuZCwgZmlsbD1zcGVjaWVzKSkKCmdncGxvdGx5KHApCmBgYAoKCgpgYGB7cn0KcDwtZ2dwbG90KGRhdGEgPSBDbGVhbl9ERikgKyAKICBnZW9tX3BvaW50KG1hcHBpbmcgPSBhZXMoeCA9IGN1bG1lbl9sZW5ndGhfbW0sIHkgPSBjdWxtZW5fZGVwdGhfbW0sY29sb3IgPSBzcGVjaWVzLCBzaGFwZT1pc2xhbmQpKQoKZ2dwbG90bHkocCkKYGBgCgpgYGB7cn0KcDwtZ2dwbG90KGRhdGEgPSBDbGVhbl9ERikgKyAKICBnZW9tX3BvaW50KG1hcHBpbmcgPSBhZXMoeCA9IGN1bG1lbl9sZW5ndGhfbW0sIHkgPSBmbGlwcGVyX2xlbmd0aF9tbSxjb2xvciA9IHNwZWNpZXMsIHNoYXBlPWlzbGFuZCkpCgpnZ3Bsb3RseShwKQpgYGAKCmBgYHtyfQpwPC1nZ3Bsb3QoZGF0YSA9IENsZWFuX0RGKSArIAogIGdlb21fcG9pbnQobWFwcGluZyA9IGFlcyh4ID0gY3VsbWVuX2xlbmd0aF9tbSwgeSA9IGJvZHlfbWFzc19nLCBjb2xvciA9IHNwZWNpZXMsIHNoYXBlPWlzbGFuZCkpCgpnZ3Bsb3RseShwKQpgYGAKCmBgYHtyfQpsaWJyYXJ5KGNvcnJwbG90KQpNPC1jb3IoQ2xlYW5fREZbMzo2XSkKY29ycnBsb3QoTSxtZXRob2Q9ImNvbG9yIixhZGRDb2VmLmNvbCA9ICJ3aGl0ZSIpCmBgYAoK